

*** cleaning for balancing check (raw scores)


use "$path_data/data/main_data.dta", clear

*-------------------------------------------------------------------
**** Use hh characteristics as control variables
*-------------------------------------------------------------------
************* Number of listed members in the hh
gen no_member = .
replace no_member=10 if !missing(a3_age_10)
replace no_member=9 if missing(no_member) & !missing(a3_age_9)
replace no_member=8 if missing(no_member) & !missing(a3_age_8)
replace no_member=7 if missing(no_member) & !missing(a3_age_7)
replace no_member=6 if missing(no_member) & !missing(a3_age_6)
replace no_member=5 if missing(no_member) & !missing(a3_age_5)
replace no_member=4 if missing(no_member) & !missing(a3_age_4)
replace no_member=3 if missing(no_member) & !missing(a3_age_3)
replace no_member=2 if missing(no_member) & !missing(a3_age_2)
replace no_member=1 if missing(no_member) & !missing(a3_age_1)

***** Reshape data to get hh characteristics: no_member, no of adults, no of children , max education of adults, 
preserve
keep student_no member_id_1- a12_frequently_income_drawn_15
reshape long member_id_ a1_name_hh_members_ a2_gender_ a3_age_ a4_relationship_hh_head_ a6_merital_status_ a7_educ_completed_grade_ a8_main_activity_past_month_ a9_hours_worked_day_ a10_days_worked_week_ a11_last_income_drawn_ a12_frequently_income_drawn_, i(student_no) j (member_no)
drop if missing(member_id)
drop member_no
gen adult = cond(a3_age_>17,1,0)
bys student_no: gen num_member = _N
bys student_no: egen num_adult = sum(adult)
gen educ_temp = a7_educ_completed_grade_ if a7_educ_completed_grade_<17
gen educ_child = a7_educ_completed_grade_ if a7_educ_completed_grade_<17 & adult==0
gen educ_adult = a7_educ_completed_grade_ if a7_educ_completed_grade_<17 & adult==1
bys student_no: egen highest_edu_child = max(educ_child)
label values highest_edu_child A7_EDUC_COMPLETED_GRADE
bys student_no: egen highest_edu_adult = max(educ_adult)
label values highest_edu_adult A7_EDUC_COMPLETED_GRADE
replace highest_edu_adult=-1 if missing(highest_edu_adult)
replace highest_edu_child=-1 if missing(highest_edu_child)
gen educated_adult = cond(highest_edu_adult>highest_edu_child,1,0)
bys student_no: egen highest_edu = max(educ_temp)
label values highest_edu A7_EDUC_COMPLETED_GRADE
bys student_no: egen num_male = sum(a2_gender_)
**** last income, last income per member
bys student_no: egen last_income = sum(a11_last_income_drawn_)
gen last_income_per_member = last_income/num_member
keep student_no num_member num_adult highest_edu_child highest_edu_adult educated_adult highest_edu last_income last_income_per_member num_male
duplicates drop
save "$path_data/temp/hh_a1_a12_merge.dta",replace
restore
merge 1:1 student_no using "$path_data/temp/hh_a1_a12_merge.dta"
drop _merge

***** Merge with latest PSC file
merge 1:1 student_no using "$path_data/data/psc_score.dta"
drop _merge




replace grade=2 if school_no==13


****** Generate "potential error group" dummies
gen wrongDT = 0
replace wrongDT = 1 if inlist(school_no, 13 ,14, 18, 19, 20)
label variable wrongDT "schools assigning wrong levels of DT"

gen cheating = 0
replace cheating = 1 if inlist(student_no, 428, 429, 518, 528, 530, 622, 626, 704, 707, 714, 715, 716, 718, 722, 724, 726, 1006, 1013, 1020, 1021, 1201, 1208, 1209, 1216, 1218, 1225, 1819)
label variable cheating "students suspected to have been cheating"

gen wrongTshort=0
replace wrongTshort = 1 if inlist(school_no, 9, 24, 25)
label variable wrongTshort "schools which restricted time too short"

gen wrongTlong=0
replace wrongTlong = 1 if inlist(school_no, 8, 26, 31)
label variable wrongTlong "schools which did not comply with time restriction"

****** Generate Area dummies corresponding to 4 branches
tab name_branch, gen(branch)

****** Adjust the teacher evaluation score
gen eval_adjust_bl = 0-evaluation_bl
gen eval_adjust_el = 0-evaluation_el

****** Generate non-cog score
destring ptsel_survey*, replace
foreach i of numlist 1/27{
	qui gen adjust_1_ptsbl_survey`i' = cond(missing(ptsbl_survey`i'),0,1)
	}

foreach i of numlist 1/27{
	qui gen adjust_1_ptsel_survey`i' = cond(missing(ptsel_survey`i'),0,1)
	}
**** Construct CPCS
egen cpcs_bl_1 = rowtotal(adjust_1_ptsbl_survey2 adjust_1_ptsbl_survey3 adjust_1_ptsbl_survey4 adjust_1_ptsbl_survey5 adjust_1_ptsbl_survey6 adjust_1_ptsbl_survey7 adjust_1_ptsbl_survey8 adjust_1_ptsbl_survey9 adjust_1_ptsbl_survey10 adjust_1_ptsbl_survey11)
label variable cpcs_bl_1 "number of answered cpcs baseline survey"

egen cpcs_el_1 = rowtotal(adjust_1_ptsel_survey2 adjust_1_ptsel_survey3 adjust_1_ptsel_survey4 adjust_1_ptsel_survey5 adjust_1_ptsel_survey6 adjust_1_ptsel_survey7 adjust_1_ptsel_survey8 adjust_1_ptsel_survey9 adjust_1_ptsel_survey10 adjust_1_ptsel_survey11)
label variable cpcs_el_1 "number of answered cpcs endline survey"

**** Construct Rosenbergh
egen rosen_bl_1 = rowtotal(adjust_1_ptsbl_survey2 adjust_1_ptsbl_survey3 adjust_1_ptsbl_survey4 adjust_1_ptsbl_survey6 adjust_1_ptsbl_survey8 adjust_1_ptsbl_survey9 adjust_1_ptsbl_survey10 adjust_1_ptsbl_survey11)
label variable rosen_bl_1 "number of answered Rosenberg baseline survey"

egen rosen_el_1 = rowtotal(adjust_1_ptsel_survey2 adjust_1_ptsel_survey3 adjust_1_ptsel_survey4 adjust_1_ptsel_survey6 adjust_1_ptsel_survey8 adjust_1_ptsel_survey9 adjust_1_ptsel_survey10 adjust_1_ptsel_survey11)
label variable rosen_el_1 "number of answered Rosenberg endline survey"

**** Construct GRIT
egen grit_bl_1 = rowtotal(adjust_1_ptsbl_survey13 adjust_1_ptsbl_survey14 adjust_1_ptsbl_survey15)
label variable grit_bl_1 "number of answered GRIT baseline survey"

egen grit_el_1 = rowtotal(adjust_1_ptsel_survey13 adjust_1_ptsel_survey14 adjust_1_ptsel_survey15)
label variable grit_el_1 "number of answered GRIT endline survey"

**** Total sum of all 27 questions
egen noncog_bl_1 = rowtotal(adjust_1_ptsbl_survey*)
label variable grit_bl_1 "number of answered all baseline survey"

egen noncog_el_1 = rowtotal(adjust_1_ptsel_survey*)
label variable grit_el_1 "number of answered all endline survey"


* Take standardized average (by grade, by question)
* change the sign of the scale for  [POSITIVE question] so that the larger the aggregate test score, the more self-confident the child is
capture program drop gennoncog
program gennoncog
args condition1 condition2
foreach i in 1 2 3 5 7 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27{
	qui gen adjust_ptsbl_survey`i' = 5-ptsbl_survey`i'
	sum adjust_ptsbl_survey`i' if grade==2 `condition1', detail
	gen temp = r(mean) if grade==2
	gen temp1 = r(sd) if grade==2
	sum adjust_ptsbl_survey`i' if grade==4 `condition1', detail
	replace temp = r(mean) if grade==4
	replace temp1 = r(sd) if grade ==4
	replace adjust_ptsbl_survey`i' = (adjust_ptsbl_survey`i'-temp)/temp1
	label variable adjust_ptsbl_survey`i' "adjust standardized value of baseline survey `i'"
	drop temp*
	}
foreach i in 4 6 8 9{
	qui gen adjust_ptsbl_survey`i' = ptsbl_survey`i'
	sum adjust_ptsbl_survey`i' if grade==2 `condition1', detail
	gen temp = r(mean) if grade==2
	gen temp1 = r(sd) if grade==2
	sum adjust_ptsbl_survey`i' if grade==4 `condition1', detail
	replace temp = r(mean) if grade==4
	replace temp1 = r(sd) if grade ==4
	replace adjust_ptsbl_survey`i' = (adjust_ptsbl_survey`i'-temp)/temp1
	label variable adjust_ptsbl_survey`i' "adjust standardized value of baseline survey `i'"
	drop temp*
	}

destring ptsel_survey*, replace
foreach i in 1 2 3 5 7 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27{
	qui gen adjust_ptsel_survey`i' = 5-ptsel_survey`i'
	sum adjust_ptsel_survey`i' if grade==2 `condition2', detail
	gen temp = r(mean) if grade==2
	gen temp1 = r(sd) if grade==2
	sum adjust_ptsel_survey`i' if grade==4 `condition2', detail
	replace temp = r(mean) if grade==4
	replace temp1 = r(sd) if grade ==4
	replace adjust_ptsel_survey`i' = (adjust_ptsel_survey`i'-temp)/temp1
	label variable adjust_ptsel_survey`i' "adjust standardized value of baseline survey `i'"
	drop temp*
	}
foreach i in 4 6 8 9{
	qui gen adjust_ptsel_survey`i' = ptsel_survey`i'
	sum adjust_ptsel_survey`i' if grade==2 `condition2', detail
	gen temp = r(mean) if grade==2
	gen temp1 = r(sd) if grade==2
	sum adjust_ptsel_survey`i' if grade==4 `condition2', detail
	replace temp = r(mean) if grade==4
	replace temp1 = r(sd) if grade ==4
	replace adjust_ptsel_survey`i' = (adjust_ptsel_survey`i'-temp)/temp1
	label variable adjust_ptsel_survey`i' "adjust standardized value of baseline survey `i'"
	drop temp*
	}
end 

forvalues i=2/6{
	sum ptsbl_problem`i'_score if grade==2, detail
	gen temp = r(mean) if grade==2
	gen temp1 = r(sd) if grade==2
	sum ptsbl_problem`i'_score if grade==4, detail
	replace temp = r(mean) if grade==4
	replace temp1 = r(sd) if grade ==4
	replace ptsbl_problem`i'_score  = (ptsbl_problem`i'_score -temp)/temp1
	label variable ptsbl_problem`i'_score "adjust standardized value of pts component `i'"
	drop temp*
	}
	
forvalues i=2/6{
	sum ptsel_problem`i'_score if grade==2, detail
	gen temp = r(mean) if grade==2
	gen temp1 = r(sd) if grade==2
	sum ptsel_problem`i'_score if grade==4, detail
	replace temp = r(mean) if grade==4
	replace temp1 = r(sd) if grade ==4
	replace ptsel_problem`i'_score  = (ptsel_problem`i'_score -temp)/temp1
	label variable ptsel_problem`i'_score "adjust standardized value of pts component `i'"
	drop temp*
	}

**** Construct CPCS
gennoncog " " " "
egen cpcs_bl = rowmean(adjust_ptsbl_survey2 adjust_ptsbl_survey3 adjust_ptsbl_survey4 adjust_ptsbl_survey5 adjust_ptsbl_survey6 adjust_ptsbl_survey7 adjust_ptsbl_survey8 adjust_ptsbl_survey9 adjust_ptsbl_survey10 adjust_ptsbl_survey11)
*replace cpcs_bl=. if cpcs_bl_1<10
label variable cpcs_bl "standardized average cpcs baseline survey"

egen cpcs_el = rowmean(adjust_ptsel_survey2 adjust_ptsel_survey3 adjust_ptsel_survey4 adjust_ptsel_survey5 adjust_ptsel_survey6 adjust_ptsel_survey7 adjust_ptsel_survey8 adjust_ptsel_survey9 adjust_ptsel_survey10 adjust_ptsel_survey11)
*replace cpcs_el=. if cpcs_el_1<10
label variable cpcs_el "standardized average cpcs endline survey"
drop adjust_pts*

**** Construct Rosenbergh
gennoncog " " " "
egen rosen_bl = rowmean(adjust_ptsbl_survey2 adjust_ptsbl_survey3 adjust_ptsbl_survey4 adjust_ptsbl_survey6 adjust_ptsbl_survey8 adjust_ptsbl_survey9 adjust_ptsbl_survey10 adjust_ptsbl_survey11)
*replace rosen_bl=. if rosen_bl_1<8
label variable rosen_bl "standardized average Rosenberg baseline survey"

egen rosen_el = rowmean(adjust_ptsel_survey2 adjust_ptsel_survey3 adjust_ptsel_survey4 adjust_ptsel_survey6 adjust_ptsel_survey8 adjust_ptsel_survey9 adjust_ptsel_survey10 adjust_ptsel_survey11)
*replace rosen_el=. if rosen_el_1<8
label variable rosen_el "standardized average Rosenberg endline survey"
drop adjust_pts*

**** Construct GRIT
gennoncog " " " "
egen grit_bl = rowmean(adjust_ptsbl_survey13 adjust_ptsbl_survey14 adjust_ptsbl_survey15)
*replace grit_bl=. if grit_bl_1<3
label variable grit_bl "standardized average GRIT baseline survey"

egen grit_el = rowmean(adjust_ptsel_survey13 adjust_ptsel_survey14 adjust_ptsel_survey15)
*replace grit_el=. if grit_el_1<3
label variable grit_el "standardized average GRIT endline survey"
drop adjust_pts*

**** Total sum of all 27 questions
gennoncog " " " "
egen noncog_bl = rowmean(adjust_ptsbl_survey*)
label variable noncog_bl "standardized average all baseline survey"

egen noncog_el = rowmean(adjust_ptsel_survey*)
label variable noncog_el "standardized average all endline survey"

foreach i of varlist adjust_ptsbl_survey*{
replace noncog_bl=. if missing(`i')
}

foreach i of varlist adjust_ptsel_survey*{
replace noncog_el=. if missing(`i')
}
****** Convert into per minute score
foreach i of numlist 1/3{
	gen DTSbl_per_min_P`i' = score_DTScoreP`i'/time_DTScoreP`i'
	gen DTSel_per_min_P`i' = score_Post_DTScoreP`i'/time_Post_DTScoreP`i'
}
gen DT_score_pre = score_DTScoreP2 if grade==2
replace DT_score_pre = score_DTScoreP3 if grade==4
gen DT_score_post = score_Post_DTScoreP2 if grade==2
replace DT_score_post = score_Post_DTScoreP3 if grade==4

gen DT_time_pre = time_DTScoreP2 if grade==2
replace DT_time_pre = time_DTScoreP3 if grade==4
gen DT_time_post = time_Post_DTScoreP2 if grade==2
replace DT_time_post = time_Post_DTScoreP3 if grade==4

gen DT_per_min_pre = DTSbl_per_min_P2 if grade==2
replace DT_per_min_pre = DTSbl_per_min_P3 if grade==4
gen DT_per_min_post = DTSel_per_min_P2 if grade==2
replace DT_per_min_post = DTSel_per_min_P3 if grade==4

gen dif_PTS_cog= ptsel_overall_score- ptsbl_overall_score
gen dif_DT_score = DT_score_post - DT_score_pre
gen dif_DT_time = DT_time_post-DT_time_pre
gen dif_DT_per_min = DT_per_min_post-DT_per_min_pre

gen longer_session = cond(inlist(school_no,6,7,9,19,16,18),1,0)

*log using "temp/sum_stats.log"
******:: Baseline
bys treatment: sum(DT_score_pre DT_time_pre DT_per_min_pre ptsbl_overall_score rosen_bl cpcs_bl cheating wrongTshort wrongTlong longer_session)
******:: Endline
bys treatment: sum(DT_score_post DT_time_post DT_per_min_post ptsel_overall_score rosen_el cpcs_el)
*log close

gen gender = . 
replace gender = 0 if student_gender == "Male" 
replace gender = 1 if student_gender == "Female"
replace gender = . if student_gender != "Female" & student_gender != "Male" 
bys treatment: sum(gender)
bys treatment: sum(longer_session)
replace gender=0 if student_no==701|student_no==1001|student_no==1101
replace gender=1 if student_no==1224





**** pSC variables
gen have_psc = cond(!missing(GPA)&GPA>0,1,0)
*** Math
gen psc_pass_math = cond(Mathematics!="F"&have_psc==1,1,.)
replace psc_pass_math=0 if Mathematics=="F"&have_psc==1
label define grading 1 "A+" 2 "A" 3 "A-" 4 "B" 5 "B-" 6 "C" 7 "D" 8 "F"
replace Mathematics="1" if Mathematics=="A+"
replace Mathematics="2" if Mathematics=="A"
replace Mathematics="3" if Mathematics=="A-"
replace Mathematics="4" if Mathematics=="B"
replace Mathematics="6" if Mathematics=="C"
replace Mathematics="7" if Mathematics=="D"
replace Mathematics="8" if Mathematics=="F"
replace Mathematics="." if Mathematics=="a"
destring Mathematics, replace
label values Mathematics grading

gen math_a=1 if Mathematics<4&have_psc==1
replace math_a=0 if Mathematics>3&have_psc==1

gen math_b=1 if Mathematics<6&have_psc==1
replace math_b=0 if Mathematics>5&have_psc==1

gen math_c=1 if Mathematics<7&have_psc==1
replace math_c=0 if Mathematics>6&have_psc==1

gen math_score = 5 if Mathematics==1
replace math_score =4 if Mathematics==2
replace math_score =3.5 if Mathematics==3
replace math_score =3 if Mathematics==4
replace math_score =2 if Mathematics==6
replace math_score =1 if Mathematics==7
replace math_score =0 if Mathematics==8

*** BangladeshandWorld
gen psc_pass_bangla = cond(BangladeshandWorld!="F"&have_psc==1,1,.)
replace psc_pass_bangla=0 if BangladeshandWorld=="F"&have_psc==1
replace BangladeshandWorld="1" if BangladeshandWorld=="A+"
replace BangladeshandWorld="2" if BangladeshandWorld=="A"
replace BangladeshandWorld="3" if BangladeshandWorld=="A-"
replace BangladeshandWorld="4" if BangladeshandWorld=="B"
replace BangladeshandWorld="5" if BangladeshandWorld=="B-"
replace BangladeshandWorld="6" if BangladeshandWorld=="C"
replace BangladeshandWorld="7" if BangladeshandWorld=="D"
replace BangladeshandWorld="8" if BangladeshandWorld=="F"
replace BangladeshandWorld="." if BangladeshandWorld=="a"
destring BangladeshandWorld, replace
label values BangladeshandWorld grading

gen bangla_a=1 if BangladeshandWorld<4&have_psc==1
replace bangla_a=0 if BangladeshandWorld>3&have_psc==1

gen bangla_b=1 if BangladeshandWorld<6&have_psc==1
replace bangla_b=0 if BangladeshandWorld>5&have_psc==1

gen bangla_c=1 if BangladeshandWorld<7&have_psc==1
replace bangla_c=0 if BangladeshandWorld>6&have_psc==1

*** Bangla
gen psc_pass_bang = cond(Bangla!="F"&have_psc==1,1,.)
replace psc_pass_bang=0 if Bangla=="F"&have_psc==1
replace Bangla="1" if Bangla=="A+"
replace Bangla="2" if Bangla=="A"
replace Bangla="3" if Bangla=="A-"
replace Bangla="3" if Bangla=="A- "
replace Bangla="4" if Bangla=="B"
replace Bangla="6" if Bangla=="C"
replace Bangla="7" if Bangla=="D"
replace Bangla="8" if Bangla=="F"
replace Bangla="." if Bangla=="a"
destring Bangla, replace
label values Bangla grading

gen bang_a=1 if Bangla<4&have_psc==1
replace bang_a=0 if Bangla>3&have_psc==1

gen bang_b=1 if Bangla<6&have_psc==1
replace bang_b=0 if Bangla>5&have_psc==1

gen bang_c=1 if Bangla<7&have_psc==1
replace bang_c=0 if Bangla>6&have_psc==1

*** English
gen psc_pass_eng = cond(English!="F"&have_psc==1,1,.)
replace psc_pass_eng=0 if English=="F"&have_psc==1
replace English="1" if English=="A+"
replace English="2" if English=="A"
replace English="3" if English=="A-"
replace English="3" if English=="A_"
replace English="4" if English=="B"
replace English="6" if English=="C"
replace English="7" if English=="D"
replace English="8" if English=="F"
replace English="." if English=="a"
destring English, replace
label values English grading

gen eng_a=1 if English<4&have_psc==1
replace eng_a=0 if English>3&have_psc==1

gen eng_b=1 if English<6&have_psc==1
replace eng_b=0 if English>5&have_psc==1

gen eng_c=1 if English<7&have_psc==1
replace eng_c=0 if English>6&have_psc==1

*** Science
gen psc_pass_science = cond(Science!="F"&have_psc==1,1,.)
replace psc_pass_science=0 if Science=="F"&have_psc==1
replace Science="1" if Science=="A+"
replace Science="2" if Science=="A"
replace Science="3" if Science=="A-"
replace Science="4" if Science=="B"
replace Science="6" if Science=="C"
replace Science="7" if Science=="D"
replace Science="8" if Science=="F"
replace Science="." if Science=="a"
destring Science, replace
label values Science grading

gen science_a=1 if Science<4&have_psc==1
replace science_a=0 if Science>3&have_psc==1

gen science_b=1 if Science<6&have_psc==1
replace science_b=0 if Science>5&have_psc==1

gen science_c=1 if Science<7&have_psc==1
replace science_c=0 if Science>6&have_psc==1

*** Religion
gen psc_pass_religion = cond(Religion!="F"&have_psc==1,1,.)
replace psc_pass_religion=0 if Religion=="F"&have_psc==1
replace Religion="1" if Religion=="A+"
replace Religion="2" if Religion=="A"
replace Religion="3" if Religion=="A-"
replace Religion="4" if Religion=="B"
replace Religion="6" if Religion=="C"
replace Religion="7" if Religion=="D"
replace Religion="8" if Religion=="F"
replace Religion="." if Religion=="a"
destring Religion, replace
label values Religion grading

gen religion_a=1 if Religion<4&have_psc==1
replace religion_a=0 if Religion>3&have_psc==1

gen religion_b=1 if Religion<6&have_psc==1
replace religion_b=0 if Religion>5&have_psc==1

gen religion_c=1 if Religion<7&have_psc==1
replace religion_c=0 if Religion>6&have_psc==1


* rosen and cpcs raw scores.


foreach i in 1 2 3 5 7 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27{
	qui gen raw_ptsbl_survey`i' = 5-ptsbl_survey`i'
	qui gen missing_in_ptsbl_`i' = 0
	qui replace missing_in_ptsbl_`i' = 1 if ptsbl_survey`i' == .
	qui egen mean_raw_ptsbl_survey`i' = mean(raw_ptsbl_survey`i')
	qui replace raw_ptsbl_survey`i' = mean_raw_ptsbl_survey`i' if ptsbl_survey`i' == .
	}
foreach i in 4 6 8 9{
	qui gen raw_ptsbl_survey`i' = ptsbl_survey`i'
	qui gen missing_in_ptsbl_`i' = 0
	qui replace missing_in_ptsbl_`i' = 1 if ptsbl_survey`i' == .
	qui egen mean_raw_ptsbl_survey`i' = mean(raw_ptsbl_survey`i')
	qui replace raw_ptsbl_survey`i' = mean_raw_ptsbl_survey`i' if ptsbl_survey`i' == .
	}

destring ptsel_survey*, replace
foreach i in 1 2 3 5 7 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27{
	qui gen raw_ptsel_survey`i' = 5-ptsel_survey`i'
	qui gen missing_in_ptsel_`i' = 0
	qui replace missing_in_ptsel_`i' = 1 if ptsel_survey`i' == .
	qui egen mean_raw_ptsel_survey`i' = mean(raw_ptsel_survey`i')
	qui replace raw_ptsel_survey`i' = mean_raw_ptsel_survey`i' if ptsel_survey`i' == .

	}
foreach i in 4 6 8 9{
	qui gen raw_ptsel_survey`i' = ptsel_survey`i'
	qui gen missing_in_ptsel_`i' = 0
	qui replace missing_in_ptsel_`i' = 1 if ptsel_survey`i' == .
	qui egen mean_raw_ptsel_survey`i' = mean(raw_ptsel_survey`i')
	qui replace raw_ptsel_survey`i' = mean_raw_ptsel_survey`i' if ptsel_survey`i' == .
	}


	
	
	
**** Construct CPCS
gen raw_cpcs_bl = raw_ptsbl_survey2 + raw_ptsbl_survey3 + raw_ptsbl_survey4 + raw_ptsbl_survey5 + raw_ptsbl_survey6 + raw_ptsbl_survey7 + raw_ptsbl_survey8 + raw_ptsbl_survey9 + raw_ptsbl_survey10 + raw_ptsbl_survey11

gen raw_cpcs_el = raw_ptsel_survey2 + raw_ptsel_survey3 + raw_ptsel_survey4 + raw_ptsel_survey5 + raw_ptsel_survey6 + raw_ptsel_survey7 + raw_ptsel_survey8 + raw_ptsel_survey9 + raw_ptsel_survey10 + raw_ptsel_survey11

gen raw_rosen_bl = raw_ptsbl_survey2 + raw_ptsbl_survey3 + raw_ptsbl_survey4  + raw_ptsbl_survey6  + raw_ptsbl_survey8 + raw_ptsbl_survey9 + raw_ptsbl_survey10 + raw_ptsbl_survey11

gen raw_rosen_el = raw_ptsel_survey2 + raw_ptsel_survey3 + raw_ptsel_survey4  + raw_ptsel_survey6  + raw_ptsel_survey8 + raw_ptsel_survey9 + raw_ptsel_survey10 + raw_ptsel_survey11



local missing_cpcs missing_in_ptsbl_2 missing_in_ptsbl_3 missing_in_ptsbl_4 missing_in_ptsbl_5 missing_in_ptsbl_6 missing_in_ptsbl_7 missing_in_ptsbl_8 missing_in_ptsbl_9 missing_in_ptsbl_10 missing_in_ptsbl_11 
local missing_rosen missing_in_ptsbl_2 missing_in_ptsbl_3 missing_in_ptsbl_4 missing_in_ptsbl_6 missing_in_ptsbl_8 missing_in_ptsbl_9 missing_in_ptsbl_10 missing_in_ptsbl_11 


gen missing_cpcs_bl = (missing_in_ptsbl_2 + missing_in_ptsbl_3 + missing_in_ptsbl_4 + missing_in_ptsbl_5 + missing_in_ptsbl_6 + missing_in_ptsbl_7 + missing_in_ptsbl_8 + missing_in_ptsbl_9 + missing_in_ptsbl_10 + missing_in_ptsbl_11) != 0
gen missing_rosen_bl = (missing_in_ptsbl_2 + missing_in_ptsbl_3 + missing_in_ptsbl_4 + missing_in_ptsbl_6 + missing_in_ptsbl_8 + missing_in_ptsbl_9 + missing_in_ptsbl_10 + missing_in_ptsbl_11 ) != 0





gen missing_cpcs_el = (missing_in_ptsel_2 + missing_in_ptsel_3 + missing_in_ptsel_4 + missing_in_ptsel_5 + missing_in_ptsel_6 + missing_in_ptsel_7 + missing_in_ptsel_8 + missing_in_ptsel_9 + missing_in_ptsel_10 + missing_in_ptsel_11) != 0
gen missing_rosen_el = (missing_in_ptsel_2 + missing_in_ptsel_3 + missing_in_ptsel_4 + missing_in_ptsel_6 + missing_in_ptsel_8 + missing_in_ptsel_9 + missing_in_ptsel_10 + missing_in_ptsel_11 ) != 0








rename ptsbl_overall_score pts_pre
rename raw_rosen_bl rosen_pre
rename raw_cpcs_bl cpcs_pre
rename ptsel_overall_score pts_post
rename raw_rosen_el rosen_post
rename raw_cpcs_el cpcs_post
rename longer_session longer_session_post
rename cheating cheating_pre
rename wrongTshort wrongTshort_pre
rename wrongTlong wrongTlong_pre
rename student_gender gender_pre
rename age age_pre
rename age_sq age_sq_pre





save "$path_data/data/temp/attrition_check.dta", replace







*** cleaning for main analysis (standardised scores)




*** cleaning (basically the same as 0_cleaning, but this standardise the scores)

use "$path_data/data/main_data.dta", clear

*-------------------------------------------------------------------
**** Use hh characteristics as control variables
*-------------------------------------------------------------------
************* Number of listed members in the hh
gen no_member = .
replace no_member=10 if !missing(a3_age_10)
replace no_member=9 if missing(no_member) & !missing(a3_age_9)
replace no_member=8 if missing(no_member) & !missing(a3_age_8)
replace no_member=7 if missing(no_member) & !missing(a3_age_7)
replace no_member=6 if missing(no_member) & !missing(a3_age_6)
replace no_member=5 if missing(no_member) & !missing(a3_age_5)
replace no_member=4 if missing(no_member) & !missing(a3_age_4)
replace no_member=3 if missing(no_member) & !missing(a3_age_3)
replace no_member=2 if missing(no_member) & !missing(a3_age_2)
replace no_member=1 if missing(no_member) & !missing(a3_age_1)

***** Reshape data to get hh characteristics: no_member, no of adults, no of children , max education of adults, 
preserve
keep student_no member_id_1- a12_frequently_income_drawn_15
reshape long member_id_ a1_name_hh_members_ a2_gender_ a3_age_ a4_relationship_hh_head_ a6_merital_status_ a7_educ_completed_grade_ a8_main_activity_past_month_ a9_hours_worked_day_ a10_days_worked_week_ a11_last_income_drawn_ a12_frequently_income_drawn_, i(student_no) j (member_no)
drop if missing(member_id)
drop member_no
gen adult = cond(a3_age_>17,1,0)
bys student_no: gen num_member = _N
bys student_no: egen num_adult = sum(adult)
gen educ_temp = a7_educ_completed_grade_ if a7_educ_completed_grade_<17
gen educ_child = a7_educ_completed_grade_ if a7_educ_completed_grade_<17 & adult==0
gen educ_adult = a7_educ_completed_grade_ if a7_educ_completed_grade_<17 & adult==1
bys student_no: egen highest_edu_child = max(educ_child)
label values highest_edu_child A7_EDUC_COMPLETED_GRADE
bys student_no: egen highest_edu_adult = max(educ_adult)
label values highest_edu_adult A7_EDUC_COMPLETED_GRADE
replace highest_edu_adult=-1 if missing(highest_edu_adult)
replace highest_edu_child=-1 if missing(highest_edu_child)
gen educated_adult = cond(highest_edu_adult>highest_edu_child,1,0)
bys student_no: egen highest_edu = max(educ_temp)
label values highest_edu A7_EDUC_COMPLETED_GRADE
bys student_no: egen num_male = sum(a2_gender_)
**** last income, last income per member
bys student_no: egen last_income = sum(a11_last_income_drawn_)
gen last_income_per_member = last_income/num_member
keep student_no num_member num_adult highest_edu_child highest_edu_adult educated_adult highest_edu last_income last_income_per_member num_male
duplicates drop
save "$path_data/temp/hh_a1_a12_merge.dta",replace
restore
merge 1:1 student_no using "$path_data/temp/hh_a1_a12_merge.dta"
drop _merge

***** Merge with latest PSC file
merge 1:1 student_no using "$path_data/data/psc_score.dta"
drop _merge




replace grade=2 if school_no==13



****** Generate "potential error group" dummies
gen wrongDT = 0
replace wrongDT = 1 if inlist(school_no, 13 ,14, 18, 19, 20)
label variable wrongDT "schools assigning wrong levels of DT"

gen cheating = 0
replace cheating = 1 if inlist(student_no, 428, 429, 518, 528, 530, 622, 626, 704, 707, 714, 715, 716, 718, 722, 724, 726, 1006, 1013, 1020, 1021, 1201, 1208, 1209, 1216, 1218, 1225, 1819)
label variable cheating "students suspected to have been cheating"

gen wrongTshort=0
replace wrongTshort = 1 if inlist(school_no, 9, 24, 25)
label variable wrongTshort "schools which restricted time too short"

gen wrongTlong=0
replace wrongTlong = 1 if inlist(school_no, 8, 26, 31)
label variable wrongTlong "schools which did not comply with time restriction"

****** Generate Area dummies corresponding to 4 branches
tab name_branch, gen(branch)

****** Adjust the teacher evaluation score
gen eval_adjust_bl = 0-evaluation_bl
gen eval_adjust_el = 0-evaluation_el

****** Generate non-cog score
destring ptsel_survey*, replace

* Total number of answered survey questions
foreach i of numlist 1/27{
	 gen adjust_1_ptsbl_survey`i' = cond(missing(ptsbl_survey`i'),0,1)
	}

foreach i of numlist 1/27{
	 gen adjust_1_ptsel_survey`i' = cond(missing(ptsel_survey`i'),0,1)
	}
**** Construct CPCS
egen cpcs_bl_1 = rowtotal(adjust_1_ptsbl_survey2 adjust_1_ptsbl_survey3 adjust_1_ptsbl_survey4 adjust_1_ptsbl_survey5 adjust_1_ptsbl_survey6 adjust_1_ptsbl_survey7 adjust_1_ptsbl_survey8 adjust_1_ptsbl_survey9 adjust_1_ptsbl_survey10 adjust_1_ptsbl_survey11)
label variable cpcs_bl_1 "number of answered cpcs baseline survey"

egen cpcs_el_1 = rowtotal(adjust_1_ptsel_survey2 adjust_1_ptsel_survey3 adjust_1_ptsel_survey4 adjust_1_ptsel_survey5 adjust_1_ptsel_survey6 adjust_1_ptsel_survey7 adjust_1_ptsel_survey8 adjust_1_ptsel_survey9 adjust_1_ptsel_survey10 adjust_1_ptsel_survey11)
label variable cpcs_el_1 "number of answered cpcs endline survey"

**** Construct Rosenbergh
egen rosen_bl_1 = rowtotal(adjust_1_ptsbl_survey2 adjust_1_ptsbl_survey3 adjust_1_ptsbl_survey4 adjust_1_ptsbl_survey6 adjust_1_ptsbl_survey8 adjust_1_ptsbl_survey9 adjust_1_ptsbl_survey10 adjust_1_ptsbl_survey11)
label variable rosen_bl_1 "number of answered Rosenberg baseline survey"

egen rosen_el_1 = rowtotal(adjust_1_ptsel_survey2 adjust_1_ptsel_survey3 adjust_1_ptsel_survey4 adjust_1_ptsel_survey6 adjust_1_ptsel_survey8 adjust_1_ptsel_survey9 adjust_1_ptsel_survey10 adjust_1_ptsel_survey11)
label variable rosen_el_1 "number of answered Rosenberg endline survey"

**** Construct GRIT
egen grit_bl_1 = rowtotal(adjust_1_ptsbl_survey13 adjust_1_ptsbl_survey14 adjust_1_ptsbl_survey15)
label variable grit_bl_1 "number of answered GRIT baseline survey"

egen grit_el_1 = rowtotal(adjust_1_ptsel_survey13 adjust_1_ptsel_survey14 adjust_1_ptsel_survey15)
label variable grit_el_1 "number of answered GRIT endline survey"

**** Total sum of all 27 questions
egen noncog_bl_1 = rowtotal(adjust_1_ptsbl_survey*)
label variable grit_bl_1 "number of answered all baseline survey"

egen noncog_el_1 = rowtotal(adjust_1_ptsel_survey*)
label variable grit_el_1 "number of answered all endline survey"



* Take standardized average (by grade, by question)
* change the sign of the scale for  [POSITIVE question] so that the larger the aggregate test score, the more self-confident the child is
capture program drop gennoncog
program gennoncog
args condition1 condition2
foreach i in 1 2 3 5 7 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27{
	 gen adjust_ptsbl_survey`i' = 5-ptsbl_survey`i'
	sum adjust_ptsbl_survey`i' if grade==2 `condition1', detail
	gen temp = r(mean) if grade==2
	gen temp1 = r(sd) if grade==2
	sum adjust_ptsbl_survey`i' if grade==4 `condition1', detail
	replace temp = r(mean) if grade==4
	replace temp1 = r(sd) if grade ==4
	replace adjust_ptsbl_survey`i' = (adjust_ptsbl_survey`i'-temp)/temp1
	label variable adjust_ptsbl_survey`i' "adjust standardized value of baseline survey `i'"
	drop temp*
	}
foreach i in 4 6 8 9{
	 gen adjust_ptsbl_survey`i' = ptsbl_survey`i'
	sum adjust_ptsbl_survey`i' if grade==2 `condition1', detail
	gen temp = r(mean) if grade==2
	gen temp1 = r(sd) if grade==2
	sum adjust_ptsbl_survey`i' if grade==4 `condition1', detail
	replace temp = r(mean) if grade==4
	replace temp1 = r(sd) if grade ==4
	replace adjust_ptsbl_survey`i' = (adjust_ptsbl_survey`i'-temp)/temp1
	label variable adjust_ptsbl_survey`i' "adjust standardized value of baseline survey `i'"
	drop temp*
	}

destring ptsel_survey*, replace
foreach i in 1 2 3 5 7 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27{
	 gen adjust_ptsel_survey`i' = 5-ptsel_survey`i'
	sum adjust_ptsel_survey`i' if grade==2 `condition2', detail
	gen temp = r(mean) if grade==2
	gen temp1 = r(sd) if grade==2
	sum adjust_ptsel_survey`i' if grade==4 `condition2', detail
	replace temp = r(mean) if grade==4
	replace temp1 = r(sd) if grade ==4
	replace adjust_ptsel_survey`i' = (adjust_ptsel_survey`i'-temp)/temp1
	label variable adjust_ptsel_survey`i' "adjust standardized value of baseline survey `i'"
	drop temp*
	}
foreach i in 4 6 8 9{
	 gen adjust_ptsel_survey`i' = ptsel_survey`i'
	sum adjust_ptsel_survey`i' if grade==2 `condition2', detail
	gen temp = r(mean) if grade==2
	gen temp1 = r(sd) if grade==2
	sum adjust_ptsel_survey`i' if grade==4 `condition2', detail
	replace temp = r(mean) if grade==4
	replace temp1 = r(sd) if grade ==4
	replace adjust_ptsel_survey`i' = (adjust_ptsel_survey`i'-temp)/temp1
	label variable adjust_ptsel_survey`i' "adjust standardized value of baseline survey `i'"
	drop temp*
	}
end 

forvalues i=2/6{
	sum ptsbl_problem`i'_score if grade==2, detail
	gen temp = r(mean) if grade==2
	gen temp1 = r(sd) if grade==2
	sum ptsbl_problem`i'_score if grade==4, detail
	replace temp = r(mean) if grade==4
	replace temp1 = r(sd) if grade ==4
	replace ptsbl_problem`i'_score  = (ptsbl_problem`i'_score -temp)/temp1
	label variable ptsbl_problem`i'_score "adjust standardized value of pts component `i'"
	drop temp*
	}
	
forvalues i=2/6{
	sum ptsel_problem`i'_score if grade==2, detail
	gen temp = r(mean) if grade==2
	gen temp1 = r(sd) if grade==2
	sum ptsel_problem`i'_score if grade==4, detail
	replace temp = r(mean) if grade==4
	replace temp1 = r(sd) if grade ==4
	replace ptsel_problem`i'_score  = (ptsel_problem`i'_score -temp)/temp1
	label variable ptsel_problem`i'_score "adjust standardized value of pts component `i'"
	drop temp*
	}

**** Construct CPCS
gennoncog " " " "
egen cpcs_bl = rowmean(adjust_ptsbl_survey2 adjust_ptsbl_survey3 adjust_ptsbl_survey4 adjust_ptsbl_survey5 adjust_ptsbl_survey6 adjust_ptsbl_survey7 adjust_ptsbl_survey8 adjust_ptsbl_survey9 adjust_ptsbl_survey10 adjust_ptsbl_survey11)
*replace cpcs_bl=. if cpcs_bl_1<10
label variable cpcs_bl "standardized average cpcs baseline survey"

egen cpcs_el = rowmean(adjust_ptsel_survey2 adjust_ptsel_survey3 adjust_ptsel_survey4 adjust_ptsel_survey5 adjust_ptsel_survey6 adjust_ptsel_survey7 adjust_ptsel_survey8 adjust_ptsel_survey9 adjust_ptsel_survey10 adjust_ptsel_survey11)
*replace cpcs_el=. if cpcs_el_1<10
label variable cpcs_el "standardized average cpcs endline survey"
drop adjust_pts*

**** Construct Rosenbergh
gennoncog " " " "
egen rosen_bl = rowmean(adjust_ptsbl_survey2 adjust_ptsbl_survey3 adjust_ptsbl_survey4 adjust_ptsbl_survey6 adjust_ptsbl_survey8 adjust_ptsbl_survey9 adjust_ptsbl_survey10 adjust_ptsbl_survey11)
*replace rosen_bl=. if rosen_bl_1<8
label variable rosen_bl "standardized average Rosenberg baseline survey"

egen rosen_el = rowmean(adjust_ptsel_survey2 adjust_ptsel_survey3 adjust_ptsel_survey4 adjust_ptsel_survey6 adjust_ptsel_survey8 adjust_ptsel_survey9 adjust_ptsel_survey10 adjust_ptsel_survey11)
*replace rosen_el=. if rosen_el_1<8
label variable rosen_el "standardized average Rosenberg endline survey"
drop adjust_pts*

**** Construct GRIT
gennoncog " " " "
egen grit_bl = rowmean(adjust_ptsbl_survey13 adjust_ptsbl_survey14 adjust_ptsbl_survey15)
*replace grit_bl=. if grit_bl_1<3
label variable grit_bl "standardized average GRIT baseline survey"

egen grit_el = rowmean(adjust_ptsel_survey13 adjust_ptsel_survey14 adjust_ptsel_survey15)
*replace grit_el=. if grit_el_1<3
label variable grit_el "standardized average GRIT endline survey"
drop adjust_pts*

**** Total sum of all 27 questions
gennoncog " " " "
egen noncog_bl = rowmean(adjust_ptsbl_survey*)
label variable noncog_bl "standardized average all baseline survey"

egen noncog_el = rowmean(adjust_ptsel_survey*)
label variable noncog_el "standardized average all endline survey"

foreach i of varlist adjust_ptsbl_survey*{
replace noncog_bl=. if missing(`i')
}

foreach i of varlist adjust_ptsel_survey*{
replace noncog_el=. if missing(`i')
}
****** Convert into per minute score
foreach i of numlist 1/3{
	gen DTSbl_per_min_P`i' = score_DTScoreP`i'/time_DTScoreP`i'
	gen DTSel_per_min_P`i' = score_Post_DTScoreP`i'/time_Post_DTScoreP`i'
	sum DTSbl_per_min_P`i' if grade==2
	replace DTSbl_per_min_P`i' = (DTSbl_per_min_P`i' - r(mean))/r(sd) if grade==2
	replace DTSel_per_min_P`i'=(DTSel_per_min_P`i'-r(mean))/r(sd) if grade==2
	sum DTSbl_per_min_P`i' if grade==4
	replace DTSbl_per_min_P`i' = (DTSbl_per_min_P`i'-r(mean))/r(sd) if grade==4
	replace DTSel_per_min_P`i'=(DTSel_per_min_P`i'-r(mean))/r(sd) if grade==4

	*gen dif_DTS_per_min_P`i' = DTSel_per_min_P`i'-DTSbl_per_min_P`i'
}
foreach i of numlist 1/3{
	sum time_DTScoreP`i' if grade==2
	replace time_DTScoreP`i' = (time_DTScoreP`i' - r(mean))/r(sd) if grade==2
	replace time_Post_DTScoreP`i'=(time_Post_DTScoreP`i'-r(mean))/r(sd) if grade==2
	sum time_DTScoreP`i' if grade==4
	replace time_DTScoreP`i' = (time_DTScoreP`i'-r(mean))/r(sd) if grade==4
	replace time_Post_DTScoreP`i'=(time_Post_DTScoreP`i'-r(mean))/r(sd) if grade==4

	sum score_DTScoreP`i' if grade==2
	replace score_DTScoreP`i' = (score_DTScoreP`i' - r(mean))/r(sd) if grade==2
	replace score_Post_DTScoreP`i'=(score_Post_DTScoreP`i'-r(mean))/r(sd) if grade==2
	sum score_DTScoreP`i' if grade==4
	replace score_DTScoreP`i' = (score_DTScoreP`i'-r(mean))/r(sd) if grade==4
	replace score_Post_DTScoreP`i'=(score_Post_DTScoreP`i'-r(mean))/r(sd) if grade==4
}
gen DT_score_pre = score_DTScoreP2 if grade==2
replace DT_score_pre = score_DTScoreP3 if grade==4
gen DT_score_post = score_Post_DTScoreP2 if grade==2
replace DT_score_post = score_Post_DTScoreP3 if grade==4

gen DT_time_pre = time_DTScoreP2 if grade==2
replace DT_time_pre = time_DTScoreP3 if grade==4
gen DT_time_post = time_Post_DTScoreP2 if grade==2
replace DT_time_post = time_Post_DTScoreP3 if grade==4

gen DT_per_min_pre = DTSbl_per_min_P2 if grade==2
replace DT_per_min_pre = DTSbl_per_min_P3 if grade==4
gen DT_per_min_post = DTSel_per_min_P2 if grade==2
replace DT_per_min_post = DTSel_per_min_P3 if grade==4

sum ptsbl_overall_score if grade==2
replace ptsbl_overall_score=(ptsbl_overall_score-r(mean))/r(sd) if grade==2
replace ptsel_overall_score=(ptsel_overall_score-r(mean))/r(sd) if grade==2

sum ptsbl_overall_score if grade==4
replace ptsbl_overall_score=(ptsbl_overall_score-r(mean))/r(sd) if grade==4
replace ptsel_overall_score=(ptsel_overall_score-r(mean))/r(sd) if grade==4

gen dif_PTS_cog= ptsel_overall_score- ptsbl_overall_score
gen dif_DT_score = DT_score_post - DT_score_pre
gen dif_DT_time = DT_time_post-DT_time_pre
gen dif_DT_per_min = DT_per_min_post-DT_per_min_pre

foreach i in cpcs rosen grit noncog{
	sum `i'_bl if grade==2
	replace `i'_bl = (`i'_bl-r(mean))/r(sd) if grade==2
	replace `i'_el = (`i'_el-r(mean))/r(sd) if grade==2
	sum `i'_bl if grade==4
	replace `i'_bl = (`i'_bl-r(mean))/r(sd) if grade==4
	replace `i'_el = (`i'_el-r(mean))/r(sd) if grade==4
	gen dif_`i' = `i'_el-`i'_bl
}

foreach i of numlist 1/27{
	gen dif_PTS_q`i'= adjust_ptsel_survey`i' - adjust_ptsbl_survey`i'
}

***** Interact treatment with gender and initial cognitive level
********Separate students into 2 groups: above vs. below median level of cognitive test (using baseline DT test score)
sum DT_per_min_pre if grade==2, detail
gen above_dtcog=0 if grade==2& !missing(DT_per_min_pre)
replace above_dtcog = 1 if grade==2& DT_per_min_pre>r(p50)& !missing(DT_per_min_pre)
sum DT_per_min_pre if grade==4, detail
replace above_dtcog=0 if grade==4 & !missing(DT_per_min_pre)
replace above_dtcog = 1 if grade==4 & DT_per_min_pre>r(p50)& !missing(DT_per_min_pre)
label variable above_dtcog "1 if initial cognitive score - DT - above the median"

sum ptsbl_overall_score if grade==2, detail
gen above_ptcog=0 if grade==2& !missing(ptsbl_overall_score)
replace above_ptcog = 1 if grade==2& ptsbl_overall_score>r(p50)& !missing(ptsbl_overall_score)
sum ptsbl_overall_score if grade==4, detail
replace above_ptcog=0 if grade==4 & !missing(ptsbl_overall_score)
replace above_ptcog = 1 if grade==4 & ptsbl_overall_score>r(p50)& !missing(ptsbl_overall_score)
label variable above_ptcog "1 if initial cognitive score - PT - above the median"
replace student_gender="0" if student_gender=="Female"
replace student_gender="1" if student_gender=="Male"
destring student_gender, replace
label values student_gender gender
replace student_gender=0 if student_no==701|student_no==1001|student_no==1101
replace student_gender=1 if student_no==1224
gen treatment_gender = treatment*student_gender
gen treatment_cog = treatment*above_dtcog
tab e1_main_income_source, gen(income_source)
tab e5_kind_toilet_facility, gen(toilet_type)
tab e7_own_house, gen (own_house)
tab e6_source_drinking_water, gen(water_source)

******* Add interaction term between treatment and initial non_cog level.
sum noncog_bl if grade==2& !missing(noncog_bl), detail
gen above_noncog=0 if grade==2& !missing(noncog_bl)
replace above_noncog = 1 if grade==2& noncog_bl>r(p50)& !missing(noncog_bl)
sum noncog_bl if grade==4& !missing(noncog_bl), detail
replace above_noncog=0 if grade==4 & !missing(noncog_bl)
replace above_noncog = 1 if grade==4 & noncog_bl>r(p50)& !missing(noncog_bl)
label variable above_noncog "1 if initial noncog is above the median"
gen treatment_noncog = treatment*above_noncog

sum cpcs_bl if grade==2&!missing(cpcs_bl), detail
gen above_cpcs=0 if grade==2&!missing(cpcs_bl)
replace above_cpcs=1 if grade==2 & cpcs_bl>r(p50)&!missing(cpcs_bl)
sum cpcs_bl if grade==4&!missing(cpcs_bl), detail
replace above_cpcs=0 if grade==4 & !missing(cpcs_bl)
replace above_cpcs = 1 if grade==4 & cpcs_bl>r(p50)&!missing(cpcs_bl)
label variable above_cpcs "1 if initial cpcs is above the median"
gen treatment_cpcs = treatment*above_cpcs

sum rosen_bl if grade==2&!missing(rosen_bl), detail
gen above_rosen=0 if grade==2&!missing(rosen_bl)
replace above_rosen=1 if grade==2 & rosen_bl>r(p50)&!missing(rosen_bl)
sum rosen_bl if grade==4&!missing(rosen_bl), detail
replace above_rosen=0 if grade==4 & !missing(rosen_bl)
replace above_rosen = 1 if grade==4 & rosen_bl>r(p50)&!missing(rosen_bl)
label variable above_rosen "1 if initial cpcs is above the median"
gen treatment_rosen = treatment*above_rosen

sum grit_bl if grade==2, detail
gen above_grit=0 if grade==2&!missing(grit_bl)
replace above_grit=1 if grade==2 & grit_bl>r(p50)
sum grit_bl if grade==4, detail
replace above_grit=0 if grade==4 & !missing(grit_bl)
replace above_grit = 1 if grade==4 & grit_bl>r(p50)
label variable above_grit "1 if initial cpcs is above the median"
gen treatment_grit = treatment*above_grit

bys school_no: egen med_eval = median(evaluation_bl)
gen above_eval = cond(evaluation_bl>med_eval,1,0)
gen treatment_eval = treatment*above_eval

gen treatment_initialcog = treatment*ptsbl_overall_score

sum eval_adjust_bl if grade==2
replace eval_adjust_bl=(eval_adjust_bl-r(mean))/r(sd) if grade==2
sum eval_adjust_bl if grade==4
replace eval_adjust_bl=(eval_adjust_bl-r(mean))/r(sd) if grade==4

gen treatment_initialnoncog = treatment*noncog_bl
gen treatment_initialrosen = treatment*rosen_bl
gen treatment_initialcpcs = treatment*cpcs_bl
gen treatment_initialgrit = treatment*grit_bl
gen treatment_initialeval = treatment*eval_adjust_bl

*********** Principle component analysis to construct poverty index
***** variables used: income, toilet, num of members, adults, education, meal components
*replace a13_hhh_sick_week_more=. if a13_hhh_sick_week_more>10
replace e2_avgincome_range=.  if e2_avgincome_range>100
gen high_quality_building=1 if e10_dwelling_type<6
replace high_quality_building=0 if e10_dwelling_type>5 & e10_dwelling_type<8
replace e8_landamt_decimal=. if e8_landamt_decimal>300
gen ownhouse = 1 if e7_own_house<4
replace ownhouse=0 if e7_own_house==4
replace water_source2=0 if water_source4==1
gen latrine = 2 if e5_kind_toilet_facility<4
replace latrine = 1 if e5_kind_toilet_facility==4
replace latrine=0 if e5_kind_toilet_facility>4
replace e4_gas_connection=. if e4_gas_connection>300
replace e4_gas_connection=0 if e4_gas_connection==2
replace e3_electricity_connection=. if e3_electricity_connection>300
replace e3_electricity_connection=0 if e3_electricity_connection==2

pca last_income last_income_per_member e2_avgincome_range highest_edu high_quality_building e8_landamt_decimal ownhouse water_source2 latrine e4_gas_connection e3_electricity_connection b14_frequently_eat_meat_fish b15_frequently_eat_egg b16_frequently_drink_milk
predict wealth1, score
sum wealth1 if grade==2, detail
gen above_wealth=0 if grade==2 & !missing(wealth1)
replace above_wealth=1 if grade==2 & wealth1>r(p50) & !missing(wealth1)
sum wealth1 if grade==4,detail
replace above_wealth=0 if grade==4 & !missing(wealth1)
replace above_wealth=1 if grade==4 & wealth1>r(p50) & !missing(wealth1)

sum wealth1 if grade==2
replace wealth1=(wealth1-r(mean))/r(sd) if grade==2
sum wealth1 if grade==4
replace wealth1=(wealth1-r(mean))/r(sd) if grade==4

gen treatment_wealth = treatment*above_wealth
gen treatment_initialwealth = treatment*wealth1



**** pSC variables
gen have_psc = cond(!missing(GPA)&GPA>0,1,0)
*** Math
gen psc_pass_math = cond(Mathematics!="F"&have_psc==1,1,.)
replace psc_pass_math=0 if Mathematics=="F"&have_psc==1
label define grading 1 "A+" 2 "A" 3 "A-" 4 "B" 5 "B-" 6 "C" 7 "D" 8 "F"
replace Mathematics="1" if Mathematics=="A+"
replace Mathematics="2" if Mathematics=="A"
replace Mathematics="3" if Mathematics=="A-"
replace Mathematics="4" if Mathematics=="B"
replace Mathematics="6" if Mathematics=="C"
replace Mathematics="7" if Mathematics=="D"
replace Mathematics="8" if Mathematics=="F"
replace Mathematics="." if Mathematics=="a"
destring Mathematics, replace
label values Mathematics grading

gen math_a=1 if Mathematics<4&have_psc==1
replace math_a=0 if Mathematics>3&have_psc==1

gen math_b=1 if Mathematics<6&have_psc==1
replace math_b=0 if Mathematics>5&have_psc==1

gen math_c=1 if Mathematics<7&have_psc==1
replace math_c=0 if Mathematics>6&have_psc==1

gen math_score = 5 if Mathematics==1
replace math_score =4 if Mathematics==2
replace math_score =3.5 if Mathematics==3
replace math_score =3 if Mathematics==4
replace math_score =2 if Mathematics==6
replace math_score =1 if Mathematics==7
replace math_score =0 if Mathematics==8

*** BangladeshandWorld
gen psc_pass_bangla = cond(BangladeshandWorld!="F"&have_psc==1,1,.)
replace psc_pass_bangla=0 if BangladeshandWorld=="F"&have_psc==1
replace BangladeshandWorld="1" if BangladeshandWorld=="A+"
replace BangladeshandWorld="2" if BangladeshandWorld=="A"
replace BangladeshandWorld="3" if BangladeshandWorld=="A-"
replace BangladeshandWorld="4" if BangladeshandWorld=="B"
replace BangladeshandWorld="5" if BangladeshandWorld=="B-"
replace BangladeshandWorld="6" if BangladeshandWorld=="C"
replace BangladeshandWorld="7" if BangladeshandWorld=="D"
replace BangladeshandWorld="8" if BangladeshandWorld=="F"
replace BangladeshandWorld="." if BangladeshandWorld=="a"
destring BangladeshandWorld, replace
label values BangladeshandWorld grading

gen bangla_a=1 if BangladeshandWorld<4&have_psc==1
replace bangla_a=0 if BangladeshandWorld>3&have_psc==1

gen bangla_b=1 if BangladeshandWorld<6&have_psc==1
replace bangla_b=0 if BangladeshandWorld>5&have_psc==1

gen bangla_c=1 if BangladeshandWorld<7&have_psc==1
replace bangla_c=0 if BangladeshandWorld>6&have_psc==1

*** Bangla
gen psc_pass_bang = cond(Bangla!="F"&have_psc==1,1,.)
replace psc_pass_bang=0 if Bangla=="F"&have_psc==1
replace Bangla="1" if Bangla=="A+"
replace Bangla="2" if Bangla=="A"
replace Bangla="3" if Bangla=="A-"
replace Bangla="3" if Bangla=="A- "
replace Bangla="4" if Bangla=="B"
replace Bangla="6" if Bangla=="C"
replace Bangla="7" if Bangla=="D"
replace Bangla="8" if Bangla=="F"
replace Bangla="." if Bangla=="a"
destring Bangla, replace
label values Bangla grading

gen bang_a=1 if Bangla<4&have_psc==1
replace bang_a=0 if Bangla>3&have_psc==1

gen bang_b=1 if Bangla<6&have_psc==1
replace bang_b=0 if Bangla>5&have_psc==1

gen bang_c=1 if Bangla<7&have_psc==1
replace bang_c=0 if Bangla>6&have_psc==1

*** English
gen psc_pass_eng = cond(English!="F"&have_psc==1,1,.)
replace psc_pass_eng=0 if English=="F"&have_psc==1
replace English="1" if English=="A+"
replace English="2" if English=="A"
replace English="3" if English=="A-"
replace English="3" if English=="A_"
replace English="4" if English=="B"
replace English="6" if English=="C"
replace English="7" if English=="D"
replace English="8" if English=="F"
replace English="." if English=="a"
destring English, replace
label values English grading

gen eng_a=1 if English<4&have_psc==1
replace eng_a=0 if English>3&have_psc==1

gen eng_b=1 if English<6&have_psc==1
replace eng_b=0 if English>5&have_psc==1

gen eng_c=1 if English<7&have_psc==1
replace eng_c=0 if English>6&have_psc==1

*** Science
gen psc_pass_science = cond(Science!="F"&have_psc==1,1,.)
replace psc_pass_science=0 if Science=="F"&have_psc==1
replace Science="1" if Science=="A+"
replace Science="2" if Science=="A"
replace Science="3" if Science=="A-"
replace Science="4" if Science=="B"
replace Science="6" if Science=="C"
replace Science="7" if Science=="D"
replace Science="8" if Science=="F"
replace Science="." if Science=="a"
destring Science, replace
label values Science grading

gen science_a=1 if Science<4&have_psc==1
replace science_a=0 if Science>3&have_psc==1

gen science_b=1 if Science<6&have_psc==1
replace science_b=0 if Science>5&have_psc==1

gen science_c=1 if Science<7&have_psc==1
replace science_c=0 if Science>6&have_psc==1

*** Religion
gen psc_pass_religion = cond(Religion!="F"&have_psc==1,1,.)
replace psc_pass_religion=0 if Religion=="F"&have_psc==1
replace Religion="1" if Religion=="A+"
replace Religion="2" if Religion=="A"
replace Religion="3" if Religion=="A-"
replace Religion="4" if Religion=="B"
replace Religion="6" if Religion=="C"
replace Religion="7" if Religion=="D"
replace Religion="8" if Religion=="F"
replace Religion="." if Religion=="a"
destring Religion, replace
label values Religion grading

gen religion_a=1 if Religion<4&have_psc==1
replace religion_a=0 if Religion>3&have_psc==1

gen religion_b=1 if Religion<6&have_psc==1
replace religion_b=0 if Religion>5&have_psc==1

gen religion_c=1 if Religion<7&have_psc==1
replace religion_c=0 if Religion>6&have_psc==1

***** Dummy for schools spending more time -> interact with treatment effect (5 minutes more)
***Alubdi-10; Ciramic-4; Sattola Purbo Bosti-9; Nowa Para; Salna Hazi Trading; Salna East
gen longer_session = cond(inlist(school_no,6,7,9,19,16,18),1,0)
gen treatment_longer = treatment*longer_session


**** Create missing dummy for ANCOVA - need to add dummy for the heterogenous regression
gen dummy_DT2 = cond(missing(DTSbl_per_min_P2),1,0)
*replace DTSbl_per_min_P2=0 if missing(DTSbl_per_min_P2)
gen dummy_DT3 = cond(missing(DTSbl_per_min_P3),1,0)
*replace DTSbl_per_min_P3=0 if missing(DTSbl_per_min_P3)
gen dummy_pts = cond(missing(ptsbl_overall_score),1,0)
*replace ptsbl_overall_score=0 if missing(ptsbl_overall_score)
gen dummy_rosen = cond(missing(rosen_bl),1,0)
*replace rosen_bl=0 if missing(rosen_bl)
gen dummy_cpcs = cond(missing(cpcs_bl),1,0)
*replace cpcs_bl=0 if missing(cpcs_bl)
gen dummy_grit = cond(missing(grit_bl),1,0)
*replace grit_bl=0 if missing(grit_bl)
gen dummy_noncog = cond(missing(noncog_bl),1,0)
*replace noncog_bl=0 if missing(noncog_bl)

gen dummy_hom_DT2=cond(missing(treatment_cog)|missing(treatment_noncog),1,0)
gen dummy_hom_DT3=cond(missing(treatment_cog)|missing(treatment_noncog),1,0)
gen dummy_hom_pts=cond(missing(treatment_cog)|missing(treatment_noncog),1,0)
gen dummy_hom_rosen=cond(missing(treatment_cog)|missing(treatment_noncog),1,0)
gen dummy_hom_cpcs=cond(missing(treatment_cog)|missing(treatment_noncog),1,0)
gen dummy_hom_grit=cond(missing(treatment_cog)|missing(treatment_noncog),1,0)

gen dummy_het_DT2=cond(missing(treatment_cog)|missing(treatment_noncog)|dummy_DT2==1,1,0)
gen dummy_het_DT3=cond(missing(treatment_cog)|missing(treatment_noncog)|dummy_DT3==1,1,0)
gen dummy_het_pts=cond(missing(treatment_cog)|missing(treatment_noncog)|dummy_pts==1,1,0)
gen dummy_het_rosen=cond(missing(treatment_cog)|missing(treatment_noncog)|dummy_rosen==1,1,0)
gen dummy_het_cpcs=cond(missing(treatment_cog)|missing(treatment_noncog)|dummy_cpcs==1,1,0)
gen dummy_het_grit=cond(missing(treatment_cog)|missing(treatment_noncog)|dummy_grit==1,1,0)

*replace treatment_cog=0 if missing(treatment_cog)
*replace treatment_noncog=0 if missing(treatment_noncog)

*replace treatment_initialcog=0 if missing(treatment_initialcog)
*replace treatment_initialnoncog=0 if missing(treatment_initialnoncog)

**** standardized the time and score separately
sum score_DTScoreP2 if grade==2
replace score_DTScoreP2=(score_DTScoreP2-r(mean))/r(sd) if grade==2
replace score_Post_DTScoreP2=(score_Post_DTScoreP2-r(mean))/r(sd) if grade==2

sum score_DTScoreP3 if grade==4
replace score_DTScoreP3=(score_DTScoreP3-r(mean))/r(sd) if grade==4
replace score_Post_DTScoreP3=(score_Post_DTScoreP3-r(mean))/r(sd) if grade==4

sum time_DTScoreP2 if grade==2
replace time_DTScoreP2=(time_DTScoreP2-r(mean))/r(sd) if grade==2
replace time_Post_DTScoreP2=(time_Post_DTScoreP2-r(mean))/r(sd) if grade==2

sum time_DTScoreP3 if grade==4
replace time_DTScoreP3=(time_DTScoreP3-r(mean))/r(sd) if grade==4
replace time_Post_DTScoreP3=(time_Post_DTScoreP3-r(mean))/r(sd) if grade==4

*replace score_DTScoreP2=0 if missing(score_DTScoreP2)
*replace score_DTScoreP3=0 if missing(score_DTScoreP3)

*replace time_DTScoreP2=0 if missing(time_DTScoreP2)
*replace time_DTScoreP3=0 if missing(time_DTScoreP3)



capture rename b9_child_discuss_subjects b9
capture rename b10_two_weeks_taught_discuss b10
capture rename b11_books_suitable_child b11
capture rename b12_often_read_story_child b12
capture rename b13_agree_help_study b13
capture rename b14_frequently_eat_meat_fish b14
capture rename b15_frequently_eat_egg b15
capture rename b16_frequently_drink_milk b16
capture rename e2_avgincome_range e2
capture rename e3_electricity_connection e3
capture rename e4_gas_connection e4
capture rename e8_landamt_decimal e8
local regressterm a13_hhh_sick_week_more num_member num_adult educated_adult last_income_per_member num_male b9 b10 b11 b12 b13 b14 b15 b16 income_source1 income_source2 income_source3 income_source4 income_source5 income_source6 e2 e3 e4 toilet_type1 toilet_type2 toilet_type3 toilet_type4 toilet_type5 toilet_type6 water_source1 water_source2 water_source3 water_source4 own_house1 own_house2 own_house3 own_house4 own_house5 e8

foreach i of local regressterm{
gen dum_`i' = cond(missing(`i'),1,0)
replace `i'=99 if missing(`i')
gen inter_`i' = `i'*dum_`i'
}

gen dum_grade = cond(grade==2,1,0)




save "$path_data/data/temp/main_reg_data.dta", replace














